library(stringr)
library(plyr)
library(igraph)

# load raw RT data
data <- readRDS("./RT_GEN.rds") # load GEN tweet 

parties <- read.table("https://docs.google.com/spreadsheets/d/e/2PACX-1vS8YvrH7vdITT1dfchlAEFlJZRB12yndqw-Ia2O6hUqf-DxQrNKeiL1MnSmJnu09dnCFCUsD6MuaMRk/pub?gid=718455706&single=true&output=csv", quote="\"", comment.char="",sep = ",",col.names = c("coalition", "party","account"))
parties <- subset(parties, !is.na(parties$coalition)) # remove AP and CP

parties$account <- tolower(parties$account)
trim <- function (x) sub('@','',x) # remove @ symbol from user names 
data$account <- sapply(data$text,function(tweet) trim(str_match(tweet,"^RT (@[[:alnum:]_]*)")[2]))
data$account <- tolower(data$account)

data <- join(data, parties, by = "account")

data_small <- data.frame(data$user.screen_name,data$account,data$party)

names(data_small) <- c("username","politician","party")
rm(data)

x <- data.frame(user=data_small$username,party=data_small$party)
x <- droplevels(x)
x <- table(x)
pol_prox_matrix <- as.matrix(x)

x <- t(apply(pol_prox_matrix, 1, function(x)(x/(sum(x)))))
pol_prox_matrix <- x
df_pol_prox <- as.data.frame(pol_prox_matrix)
df_pol_prox <- subset(df_pol_prox, is.nan(df_pol_prox$FI)==FALSE) # remove nan created by apply function
df_pol_prox$username <- rownames(df_pol_prox)

saveRDS(df_pol_prox, file="./user_proximity_gen.rds")

#### descriptive statistics #####
summary(df_pol_prox$Civica_popolare)
summary(df_pol_prox$FdI)
summary(df_pol_prox$FI)
summary(df_pol_prox$Insieme)
summary(df_pol_prox$LeU)
summary(df_pol_prox$LN)
summary(df_pol_prox$M5S)
summary(df_pol_prox$PD)
summary(df_pol_prox$Piu_Europa)
summary(df_pol_prox$PP)

nrow(subset(df_pol_prox, df_pol_prox$Civica_popolare > 0))
nrow(subset(df_pol_prox, df_pol_prox$FdI > 0))
nrow(subset(df_pol_prox, df_pol_prox$FI > 0))
nrow(subset(df_pol_prox, df_pol_prox$Insieme > 0))
nrow(subset(df_pol_prox, df_pol_prox$LeU > 0))
nrow(subset(df_pol_prox, df_pol_prox$LN > 0))
nrow(subset(df_pol_prox, df_pol_prox$M5S > 0))
nrow(subset(df_pol_prox, df_pol_prox$PD > 0))
nrow(subset(df_pol_prox, df_pol_prox$Piu_Europa > 0))
nrow(subset(df_pol_prox, df_pol_prox$PP > 0))

#### network #####
### bipartite netowrk users -> politicians
### data.frame(user=V(gt)$name,community=V(gt)$community for a DF with users and communities)
library(igraph)
gt=graph.incidence(t,weighted = T)
community <- cluster_louvain(gt)
V(gt)$community <- community$membership